package com.itcast.peft.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;

import lombok.extern.slf4j.Slf4j;

@Slf4j
public class PDFUtil {

    public static String pdfToString(File f) {
        // 加载PDF文档
        try (FileInputStream inputStream = new FileInputStream(f);
             PDDocument document = PDDocument.load(inputStream)) {
            // 创建一个PDFTextStripper实例来提取文本
            PDFTextStripper pdfStripper = new PDFTextStripper();
            // 从PDF文档中提取文本
            return pdfStripper.getText(document);
        } catch (IOException e) {
            log.error("IOException in pdf2string", e);
            return StringUtils.EMPTY;
        }
    }
}